{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "pd.set_option('display.max_columns', 30)\n",
    "pd.set_option('display.max_rows', None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%ls /TimeseriesDatasets/forecasting/epidemic\n",
    "%ls /TimeseriesDatasets/forecasting/epidemic/EU-Flu"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "DATASET_FILENAME = '/TimeseriesDatasets/forecasting/epidemic/EU-Flu/EUdata.csv'\n",
    "OUTPUT_DIR = '/TimeseriesDatasets/forecasting/epidemic/preprocessed/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(DATASET_FILENAME)\n",
    "print(len(df))\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "unique_countries = df['country'].unique()\n",
    "unique_indicators = df['indicator'].unique()\n",
    "\n",
    "for country in unique_countries:\n",
    "    df_country = df[df['country'] == country]\n",
    "    for indicator in unique_indicators:\n",
    "        if indicator not in df_country['indicator'].values:\n",
    "            continue\n",
    "        dataset_name = f'EU-Flu_{country}_{indicator.replace(\" \", \"_\")}.csv'\n",
    "\n",
    "        df_indicator = df_country[df_country['indicator'] == indicator]\n",
    "        df_indicator.reset_index(drop=True)\n",
    "        df_indicator.drop(columns=['country', 'indicator'], inplace=True)\n",
    "\n",
    "        if not os.path.exists(OUTPUT_DIR):\n",
    "            os.makedirs(OUTPUT_DIR)\n",
    "        df_indicator.to_csv(OUTPUT_DIR + dataset_name, index=False)\n",
    "\n",
    "        print(dataset_name)\n",
    "        print(df_indicator.head())\n",
    "        plt.plot(df_indicator['value'])\n",
    "        plt.title(dataset_name)\n",
    "        plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "moment",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
